Detecting Brain Tumors¶
Imports¶
In [ ]:
import os
import numpy as np
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import random
import shutil
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
In [ ]:
if not os.path.exists('./plots'):
os.makedirs('./plots')
Load Dataset¶
In [ ]:
dff = pd.read_csv('data/Brain_Tumor.csv')
In [ ]:
dff.head()
Out[ ]:
| Image | Class | Mean | Variance | Standard Deviation | Entropy | Skewness | Kurtosis | Contrast | Energy | ASM | Homogeneity | Dissimilarity | Correlation | Coarseness | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Image1 | 0 | 6.535339 | 619.587845 | 24.891522 | 0.109059 | 4.276477 | 18.900575 | 98.613971 | 0.293314 | 0.086033 | 0.530941 | 4.473346 | 0.981939 | 7.458341e-155 |
| 1 | Image2 | 0 | 8.749969 | 805.957634 | 28.389393 | 0.266538 | 3.718116 | 14.464618 | 63.858816 | 0.475051 | 0.225674 | 0.651352 | 3.220072 | 0.988834 | 7.458341e-155 |
| 2 | Image3 | 1 | 7.341095 | 1143.808219 | 33.820234 | 0.001467 | 5.061750 | 26.479563 | 81.867206 | 0.031917 | 0.001019 | 0.268275 | 5.981800 | 0.978014 | 7.458341e-155 |
| 3 | Image4 | 1 | 5.958145 | 959.711985 | 30.979219 | 0.001477 | 5.677977 | 33.428845 | 151.229741 | 0.032024 | 0.001026 | 0.243851 | 7.700919 | 0.964189 | 7.458341e-155 |
| 4 | Image5 | 0 | 7.315231 | 729.540579 | 27.010009 | 0.146761 | 4.283221 | 19.079108 | 174.988756 | 0.343849 | 0.118232 | 0.501140 | 6.834689 | 0.972789 | 7.458341e-155 |
In [ ]:
dff.describe()
Out[ ]:
| Class | Mean | Variance | Standard Deviation | Entropy | Skewness | Kurtosis | Contrast | Energy | ASM | Homogeneity | Dissimilarity | Correlation | Coarseness | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3762.000000 | 3.762000e+03 |
| mean | 0.447368 | 9.488890 | 711.101063 | 25.182271 | 0.073603 | 4.102727 | 24.389071 | 127.961459 | 0.204705 | 0.058632 | 0.479252 | 4.698498 | 0.955767 | 7.458341e-155 |
| std | 0.497288 | 5.728022 | 467.466896 | 8.773526 | 0.070269 | 2.560940 | 56.434747 | 109.499601 | 0.129352 | 0.058300 | 0.127929 | 1.850173 | 0.026157 | 0.000000e+00 |
| min | 0.000000 | 0.078659 | 3.145628 | 1.773592 | 0.000882 | 1.886014 | 3.942402 | 3.194733 | 0.024731 | 0.000612 | 0.105490 | 0.681121 | 0.549426 | 7.458341e-155 |
| 25% | 0.000000 | 4.982395 | 363.225459 | 19.058475 | 0.006856 | 2.620203 | 7.252852 | 72.125208 | 0.069617 | 0.004847 | 0.364973 | 3.412363 | 0.947138 | 7.458341e-155 |
| 50% | 0.000000 | 8.477531 | 622.580417 | 24.951560 | 0.066628 | 3.422210 | 12.359088 | 106.737418 | 0.225496 | 0.050849 | 0.512551 | 4.482404 | 0.961610 | 7.458341e-155 |
| 75% | 1.000000 | 13.212723 | 966.954319 | 31.095889 | 0.113284 | 4.651737 | 22.640304 | 161.059006 | 0.298901 | 0.089342 | 0.575557 | 5.723821 | 0.971355 | 7.458341e-155 |
| max | 1.000000 | 33.239975 | 2910.581879 | 53.949809 | 0.394539 | 36.931294 | 1371.640060 | 3382.574163 | 0.589682 | 0.347725 | 0.810921 | 27.827751 | 0.989972 | 7.458341e-155 |
In [ ]:
# drop Image feature
df = dff.drop(columns=['Image'])
In [ ]:
df.columns
Out[ ]:
Index(['Class', 'Mean', 'Variance', 'Standard Deviation', 'Entropy',
'Skewness', 'Kurtosis', 'Contrast', 'Energy', 'ASM', 'Homogeneity',
'Dissimilarity', 'Correlation', 'Coarseness'],
dtype='object')
Data Exploration¶
In [ ]:
df[df.columns[1:14]].hist(alpha=0.8, figsize=(20, 20))
plt.savefig('plots/distributions.pdf')
In [ ]:
size=(20, 15)
for i in range(1, 14):
plt.subplot(4, 4, i)
df.groupby('Class')[df.columns[i]].hist(alpha=0.6, figsize=size)
plt.legend(['malignant', 'benign'])
plt.xlabel(df.columns[i])
plt.tight_layout()
plt.savefig('plots/benign_malignant_comparison.pdf')
In [ ]:
scatter_matrix(df[df.columns[1:14]], c=df['Class'], alpha=0.8, figsize=(30, 30), s=20)
plt.tight_layout()
# plt.show()
plt.savefig('plots/scatter_matrix.pdf', dpi=50)
In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(df[df.columns[0:13]].corr(), annot=True, square=True, cmap='coolwarm')
plt.tight_layout()
# plt.show()
plt.savefig('plots/correlation.pdf')
Preprocessing¶
In [ ]:
# test-train split
X, y = np.array(df.iloc[:, 1:14]), df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
In [ ]:
# scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
Models¶
SVM¶
In [ ]:
SVM = SVC()
SVM.fit(X_train, y_train)
svm_pred = SVM.predict(X_test)
In [ ]:
print('Accuracy: %.4f' % accuracy_score(y_test, svm_pred))
Accuracy: 0.9801
Logistic Regression¶
In [ ]:
# Logistic Regression
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
In [ ]:
print('Accuracy: %.4f' % accuracy_score(y_test, lr_pred))
Accuracy: 0.9788
kNN¶
In [ ]:
# kNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
In [ ]:
print('Accuracy: %.4f' % accuracy_score(y_test, knn_pred))
Accuracy: 0.9761
In [ ]:
# move files from Brain_Tumor directory to Malignant and Benign directories
#for i in range(0, 3762):
# if df['Class'][i]==0:
# shutil.move(f'data/Brain_Tumor/Brain_Tumor/Image{i+1}.jpg', f'data/Brain_Tumor/Benign/Image{i+1}.jpg')
# else:
# shutil.move(f'data/Brain_Tumor/Brain_Tumor/Image{i+1}.jpg', f'data/Brain_Tumor/Malignant/Image{i+1}.jpg')
In [ ]:
img_height = 240
img_width = 240
batch_size = 32
data_dir = 'data/Brain_Tumor'
# get train dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
Found 3010 files belonging to 2 classes. Using 2408 files for training.
In [ ]:
# get validation dataset
val_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
Found 3010 files belonging to 2 classes. Using 602 files for validation.
In [ ]:
# get test dataset
test_ds = tf.keras.utils.image_dataset_from_directory(
'data/test_data',
shuffle=False,
image_size=(img_height, img_width),
batch_size=batch_size)
Found 752 files belonging to 2 classes.
In [ ]:
class_names = train_ds.class_names
print(class_names)
['Benign', 'Malignant']
In [ ]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[labels[i]])
plt.axis("off")
plt.savefig('plots/tumor_images.pdf')
2024-07-24 12:48:45.061036: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2408]
[[{{node Placeholder/_4}}]]
2024-07-24 12:48:45.061500: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2408]
[[{{node Placeholder/_4}}]]
In [ ]:
CNN = tf.keras.Sequential([
tf.keras.layers.Rescaling(1./255),
tf.keras.layers.Conv2D(24, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(12, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
# tf.keras.layers.Dropout(0.5),
tf.keras.layers.Conv2D(8, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(4, activation='relu'),
tf.keras.layers.Dense(2)
])
optimizer = tf.optimizers.Adam(learning_rate=0.005)
CNN.compile(
optimizer=optimizer,
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`. WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
In [ ]:
CNN_hist = CNN.fit(
train_ds,
validation_data=val_ds,
epochs=30
)
Epoch 1/30 76/76 [==============================] - ETA: 0s - loss: 0.4832 - accuracy: 0.7961
2024-07-24 12:49:12.673031: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [602]
[[{{node Placeholder/_4}}]]
2024-07-24 12:49:12.673217: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [602]
[[{{node Placeholder/_4}}]]
76/76 [==============================] - 17s 229ms/step - loss: 0.4832 - accuracy: 0.7961 - val_loss: 0.4617 - val_accuracy: 0.8123 Epoch 2/30 76/76 [==============================] - 17s 222ms/step - loss: 0.4312 - accuracy: 0.8227 - val_loss: 0.4096 - val_accuracy: 0.8571 Epoch 3/30 76/76 [==============================] - 17s 226ms/step - loss: 0.4072 - accuracy: 0.8447 - val_loss: 0.3809 - val_accuracy: 0.8771 Epoch 4/30 76/76 [==============================] - 17s 222ms/step - loss: 0.3701 - accuracy: 0.8468 - val_loss: 0.3521 - val_accuracy: 0.8804 Epoch 5/30 76/76 [==============================] - 17s 219ms/step - loss: 0.3209 - accuracy: 0.8783 - val_loss: 0.3345 - val_accuracy: 0.9037 Epoch 6/30 76/76 [==============================] - 17s 218ms/step - loss: 0.2902 - accuracy: 0.8866 - val_loss: 0.3059 - val_accuracy: 0.8837 Epoch 7/30 76/76 [==============================] - 19s 252ms/step - loss: 0.2536 - accuracy: 0.9037 - val_loss: 0.2663 - val_accuracy: 0.9103 Epoch 8/30 76/76 [==============================] - 18s 236ms/step - loss: 0.2587 - accuracy: 0.8978 - val_loss: 0.2503 - val_accuracy: 0.9136 Epoch 9/30 76/76 [==============================] - 21s 278ms/step - loss: 0.2147 - accuracy: 0.9257 - val_loss: 0.2597 - val_accuracy: 0.9086 Epoch 10/30 76/76 [==============================] - 18s 229ms/step - loss: 0.1947 - accuracy: 0.9294 - val_loss: 0.2579 - val_accuracy: 0.9269 Epoch 11/30 76/76 [==============================] - 17s 227ms/step - loss: 0.1673 - accuracy: 0.9373 - val_loss: 0.2659 - val_accuracy: 0.9020 Epoch 12/30 76/76 [==============================] - 18s 230ms/step - loss: 0.1583 - accuracy: 0.9406 - val_loss: 0.2383 - val_accuracy: 0.9385 Epoch 13/30 76/76 [==============================] - 18s 231ms/step - loss: 0.1545 - accuracy: 0.9435 - val_loss: 0.2508 - val_accuracy: 0.9120 Epoch 14/30 76/76 [==============================] - 17s 228ms/step - loss: 0.1321 - accuracy: 0.9522 - val_loss: 0.2422 - val_accuracy: 0.9219 Epoch 15/30 76/76 [==============================] - 18s 237ms/step - loss: 0.1261 - accuracy: 0.9522 - val_loss: 0.2567 - val_accuracy: 0.9252 Epoch 16/30 76/76 [==============================] - 18s 231ms/step - loss: 0.1317 - accuracy: 0.9498 - val_loss: 0.2468 - val_accuracy: 0.9219 Epoch 17/30 76/76 [==============================] - 19s 256ms/step - loss: 0.1308 - accuracy: 0.9556 - val_loss: 0.2254 - val_accuracy: 0.9269 Epoch 18/30 76/76 [==============================] - 19s 253ms/step - loss: 0.1240 - accuracy: 0.9514 - val_loss: 0.2364 - val_accuracy: 0.9219 Epoch 19/30 76/76 [==============================] - 18s 240ms/step - loss: 0.1063 - accuracy: 0.9622 - val_loss: 0.2182 - val_accuracy: 0.9302 Epoch 20/30 76/76 [==============================] - 18s 232ms/step - loss: 0.0902 - accuracy: 0.9647 - val_loss: 0.2117 - val_accuracy: 0.9302 Epoch 21/30 76/76 [==============================] - 18s 230ms/step - loss: 0.0770 - accuracy: 0.9784 - val_loss: 0.2503 - val_accuracy: 0.9336 Epoch 22/30 76/76 [==============================] - 17s 228ms/step - loss: 0.0882 - accuracy: 0.9697 - val_loss: 0.2033 - val_accuracy: 0.9336 Epoch 23/30 76/76 [==============================] - 18s 230ms/step - loss: 0.0727 - accuracy: 0.9759 - val_loss: 0.2681 - val_accuracy: 0.9219 Epoch 24/30 76/76 [==============================] - 18s 230ms/step - loss: 0.0688 - accuracy: 0.9763 - val_loss: 0.2619 - val_accuracy: 0.9319 Epoch 25/30 76/76 [==============================] - 18s 232ms/step - loss: 0.1160 - accuracy: 0.9564 - val_loss: 0.2893 - val_accuracy: 0.9269 Epoch 26/30 76/76 [==============================] - 18s 235ms/step - loss: 0.0900 - accuracy: 0.9705 - val_loss: 0.2947 - val_accuracy: 0.9203 Epoch 27/30 76/76 [==============================] - 18s 236ms/step - loss: 0.0822 - accuracy: 0.9738 - val_loss: 0.2349 - val_accuracy: 0.9269 Epoch 28/30 76/76 [==============================] - 18s 230ms/step - loss: 0.0662 - accuracy: 0.9784 - val_loss: 0.2949 - val_accuracy: 0.9302 Epoch 29/30 76/76 [==============================] - 18s 235ms/step - loss: 0.0695 - accuracy: 0.9821 - val_loss: 0.2012 - val_accuracy: 0.9336 Epoch 30/30 76/76 [==============================] - 18s 241ms/step - loss: 0.0691 - accuracy: 0.9730 - val_loss: 0.2392 - val_accuracy: 0.9336
In [ ]:
if not os.path.exists('./models'):
os.makedirs('./models')
CNN.save('./models/CNN')
2024-07-23 18:56:42.882581: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
[[{{node inputs}}]]
2024-07-23 18:56:43.006320: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
[[{{node inputs}}]]
WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 3 of 3). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ./models/CNN/assets
INFO:tensorflow:Assets written to: ./models/CNN/assets
Evaluation¶
In [ ]:
import itertools
from sklearn.metrics import r2_score, classification_report, mean_squared_error, mean_absolute_error, confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
def evaluate_model(test, predict, model):
conf_mtx = confusion_matrix(test, predict)
plot_confusion_matrix(cm=conf_mtx, classes=['malignant', 'benign'], title=f'Confusion matrix for {model}')
plt.savefig(f'plots/confusion_matrix_{model}.pdf')
plt.show()
predict.reshape(-1)
plt.hist(test, alpha=0.5, color='red', range=[0, 1], bins=2)
plt.hist(predict, alpha=0.5, color='red', range=[0, 1], bins=2, histtype='step', linewidth=2)
plt.tight_layout()
plt.xlabel('Class')
plt.ylabel('Number')
plt.xticks([0, 1], ['malignant', 'benign'])
plt.title(f'Prediction distribution for {model}')
plt.legend(['testing prediction', 'testing true'], loc='upper right')
plt.savefig(f'plots/histogram_{model}.pdf')
plt.show()
def false_predictions(test, predict, model):
errors = np.where(test != predict)[0][:6]
fig, axs = plt.subplots(2, 3, figsize=(12, 8))
axs = axs.ravel()
for i, err in enumerate(errors):
axs[i].imshow(images[i].numpy().astype("uint8"), cmap='gray')
if predict[err] == 0:
axs[i].set_title(f"Malignant")
else:
axs[i].set_title(f"Benign")
# axs[i].set_title(f"Predicted: {predict[err]}")
axs[i].axis('off')
fig.tight_layout()
plt.title('{model} False Predictions')
plt.savefig(f'plots/false_predictions_{model}.pdf')
plt.show()
def adv_classifications(test, predict):
print(classification_report(test, predict))
print('Coefficient of determination: %.4f' % r2_score(test, predict))
# The mean squared error
print("Mean squared error: %.4f" % mean_squared_error(test, predict))
# The mean squared error
print("Mean absolute error: %.4f" % mean_absolute_error(test, predict))
def full_evaluation(test, predict, model, images=False):
print('Accuracy: %.4f' % accuracy_score(test, predict))
adv_classifications(test, predict)
evaluate_model(test, predict, model)
if images == True : false_predictions(test, predict, model)
SVM¶
In [ ]:
full_evaluation(y_test, svm_pred, 'SVM')
Accuracy: 0.9801
precision recall f1-score support
0 0.97 1.00 0.98 414
1 0.99 0.96 0.98 339
accuracy 0.98 753
macro avg 0.98 0.98 0.98 753
weighted avg 0.98 0.98 0.98 753
Coefficient of determination: 0.9195
Mean squared error: 0.0199
Mean absolute error: 0.0199
Logistic Regression¶
In [ ]:
# evaluate_model(y_test, lr_pred, 'LR')
full_evaluation(y_test, lr_pred, 'LR')
Accuracy: 0.9788
precision recall f1-score support
0 0.97 0.99 0.98 414
1 0.99 0.96 0.98 339
accuracy 0.98 753
macro avg 0.98 0.98 0.98 753
weighted avg 0.98 0.98 0.98 753
Coefficient of determination: 0.9142
Mean squared error: 0.0212
Mean absolute error: 0.0212
KNN¶
In [ ]:
# evaluate_model(y_test, knn_pred, 'kNN')
full_evaluation(y_test, knn_pred, 'kNN')
Accuracy: 0.9761
precision recall f1-score support
0 0.96 1.00 0.98 414
1 1.00 0.95 0.97 339
accuracy 0.98 753
macro avg 0.98 0.97 0.98 753
weighted avg 0.98 0.98 0.98 753
Coefficient of determination: 0.9034
Mean squared error: 0.0239
Mean absolute error: 0.0239
CNN¶
In [ ]:
CNN_loaded = tf.keras.models.load_model("models/CNN")
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.RestoredOptimizer` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.RestoredOptimizer`. WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`. WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
In [ ]:
def plot_history(network_history):
fig, axs = plt.subplots(2, 1, figsize=(10, 10))
axs[0].plot(network_history.history['loss'])
axs[0].plot(network_history.history['val_loss'])
axs[0].set_title('Model Loss')
axs[0].set_ylabel('Loss')
axs[0].set_xlabel('Epoch')
axs[0].legend(['train', 'validation'], loc='upper right')
axs[1].plot(network_history.history['accuracy'])
axs[1].plot(network_history.history['val_accuracy'])
axs[1].set_title('Model Accuracy')
axs[1].set_ylabel('Accuracy')
axs[1].set_xlabel('Epoch')
axs[1].legend(['train', 'validation'], loc='lower right')
plt.tight_layout()
plt.savefig(f'plots/{network_history}-history.pdf')
# plt.figure()
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.plot(network_history.history['loss'])
# plt.plot(network_history.history['val_loss'])
# plt.legend(['Training', 'Validation'])
# plt.figure()
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.plot(network_history.history['accuracy'])
# plt.plot(network_history.history['val_accuracy'])
# plt.legend(['Training', 'Validation'], loc='lower right')
# plt.show()
In [ ]:
plot_history(CNN_hist)
In [ ]:
cnn_pred = CNN_loaded.predict(test_ds)
cnn_pred = cnn_pred.argmax(axis=1)
# get test labels
test_labels = np.zeros(752)
test_labels[0:336] = 0
test_labels[336:] = 1
print('Accuracy: %.4f' % accuracy_score(test_labels, cnn_pred))
1/24 [>.............................] - ETA: 2s
2024-07-24 12:57:56.338696: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [752]
[[{{node Placeholder/_4}}]]
2024-07-24 12:57:56.338906: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [752]
[[{{node Placeholder/_4}}]]
24/24 [==============================] - 2s 94ms/step Accuracy: 0.9229
In [ ]:
full_evaluation(test_labels, cnn_pred, 'CNN')
Accuracy: 0.9229
precision recall f1-score support
0.0 0.86 0.99 0.92 336
1.0 0.99 0.87 0.93 416
accuracy 0.92 752
macro avg 0.92 0.93 0.92 752
weighted avg 0.93 0.92 0.92 752
Coefficient of determination: 0.6880
Mean squared error: 0.0771
Mean absolute error: 0.0771
In [ ]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(test_labels, cnn_pred)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.savefig('plots/roc_curve.pdf')
plt.show()
Hyperparameter optimization with Gridsearch¶
In [ ]:
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
from tensorflow.keras.models import Sequential, load_model
if not os.path.exists('./gridsearch'):
os.makedirs('./gridsearch')
search_results = []
filters_candidates = [24, 36, 48, 60, 72]
dense_candidates = [4, 8, 12, 16, 20, 24]
dropout_candidates = [.4, .5, .6]
for nb_filters in filters_candidates:
for nb_dense in dense_candidates:
for dropout in dropout_candidates:
print(f"Start training for (filters={nb_filters} - dense={nb_dense} - dropout={dropout})")
########################################
model = tf.keras.Sequential([
tf.keras.layers.Rescaling(1./255),
tf.keras.layers.Conv2D(nb_filters, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(int(nb_filters/2), 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
# tf.keras.layers.Dropout(dropout),
tf.keras.layers.Conv2D(int(nb_filters/3), 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(dropout),
tf.keras.layers.Dense(nb_dense, activation='relu'),
tf.keras.layers.Dense(2)
])
optimizer = tf.optimizers.Adam()
model.compile(
optimizer=optimizer,
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# we choose our best model as the one having the highest validation accuracy
filepath = f"./gridsearch/cnn_paramsearch_filters_f={nb_filters}_dn={nb_dense}_do={dropout}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=0, save_best_only=True, mode='max')
fit_results = model.fit(
train_ds,
validation_data=val_ds,
batch_size=batch_size,
# reduced number of epochs for speed reasons --> should be higher!
epochs=30,
verbose=0,
callbacks=[checkpoint],
)
# extract the best validation scores
best_val_epoch = np.argmax(fit_results.history['val_accuracy'])
best_val_acc = np.max(fit_results.history['val_accuracy'])
best_val_acc_loss = fit_results.history['val_loss'][best_val_epoch]
# get correct training accuracy
best_model = load_model(filepath)
# get test labels
test_labels = np.zeros(752)
test_labels[0:336] = 0
test_labels[336:] = 1
# test_labels.reshape(-1)
best_val_acc_train_loss, best_val_acc_train_acc = best_model.evaluate(train_ds, verbose=0)
# store results
search_results.append({
'nb_filters': nb_filters,
'nb_dense': nb_dense,
'dropout': dropout,
'best_val_acc_train_acc': best_val_acc_train_acc,
'best_val_acc': best_val_acc,
'best_val_acc_train_loss': best_val_acc_train_loss,
'best_val_acc_loss': best_val_acc_loss,
'best_val_epoch': best_val_epoch,
'history': fit_results.history,
'train_loss': fit_results.history['loss']
})
Start training for (filters=24 - dense=4 - dropout=0.4) Start training for (filters=24 - dense=4 - dropout=0.5) Start training for (filters=24 - dense=4 - dropout=0.6) Start training for (filters=24 - dense=8 - dropout=0.4) Start training for (filters=24 - dense=8 - dropout=0.5) Start training for (filters=24 - dense=8 - dropout=0.6) Start training for (filters=24 - dense=12 - dropout=0.4) Start training for (filters=24 - dense=12 - dropout=0.5) Start training for (filters=24 - dense=12 - dropout=0.6) Start training for (filters=24 - dense=16 - dropout=0.4) Start training for (filters=24 - dense=16 - dropout=0.5) Start training for (filters=24 - dense=16 - dropout=0.6) Start training for (filters=24 - dense=20 - dropout=0.4) Start training for (filters=24 - dense=20 - dropout=0.5) Start training for (filters=24 - dense=20 - dropout=0.6) Start training for (filters=24 - dense=24 - dropout=0.4) Start training for (filters=24 - dense=24 - dropout=0.5) Start training for (filters=24 - dense=24 - dropout=0.6) Start training for (filters=36 - dense=4 - dropout=0.4) Start training for (filters=36 - dense=4 - dropout=0.5) Start training for (filters=36 - dense=4 - dropout=0.6) Start training for (filters=36 - dense=8 - dropout=0.4) Start training for (filters=36 - dense=8 - dropout=0.5) Start training for (filters=36 - dense=8 - dropout=0.6) Start training for (filters=36 - dense=12 - dropout=0.4) Start training for (filters=36 - dense=12 - dropout=0.5) Start training for (filters=36 - dense=12 - dropout=0.6) Start training for (filters=36 - dense=16 - dropout=0.4) Start training for (filters=36 - dense=16 - dropout=0.5) Start training for (filters=36 - dense=16 - dropout=0.6) Start training for (filters=36 - dense=20 - dropout=0.4) Start training for (filters=36 - dense=20 - dropout=0.5) Start training for (filters=36 - dense=20 - dropout=0.6) Start training for (filters=36 - dense=24 - dropout=0.4) Start training for (filters=36 - dense=24 - dropout=0.5) Start training for (filters=36 - dense=24 - dropout=0.6) Start training for (filters=48 - dense=4 - dropout=0.4) Start training for (filters=48 - dense=4 - dropout=0.5) Start training for (filters=48 - dense=4 - dropout=0.6) Start training for (filters=48 - dense=8 - dropout=0.4) Start training for (filters=48 - dense=8 - dropout=0.5) Start training for (filters=48 - dense=8 - dropout=0.6) Start training for (filters=48 - dense=12 - dropout=0.4) Start training for (filters=48 - dense=12 - dropout=0.5) Start training for (filters=48 - dense=12 - dropout=0.6) Start training for (filters=48 - dense=16 - dropout=0.4) Start training for (filters=48 - dense=16 - dropout=0.5) Start training for (filters=48 - dense=16 - dropout=0.6) Start training for (filters=48 - dense=20 - dropout=0.4) Start training for (filters=48 - dense=20 - dropout=0.5) Start training for (filters=48 - dense=20 - dropout=0.6) Start training for (filters=48 - dense=24 - dropout=0.4) Start training for (filters=48 - dense=24 - dropout=0.5) Start training for (filters=48 - dense=24 - dropout=0.6) Start training for (filters=60 - dense=4 - dropout=0.4) Start training for (filters=60 - dense=4 - dropout=0.5) Start training for (filters=60 - dense=4 - dropout=0.6) Start training for (filters=60 - dense=8 - dropout=0.4) Start training for (filters=60 - dense=8 - dropout=0.5) Start training for (filters=60 - dense=8 - dropout=0.6) Start training for (filters=60 - dense=12 - dropout=0.4) Start training for (filters=60 - dense=12 - dropout=0.5) Start training for (filters=60 - dense=12 - dropout=0.6) Start training for (filters=60 - dense=16 - dropout=0.4) Start training for (filters=60 - dense=16 - dropout=0.5) Start training for (filters=60 - dense=16 - dropout=0.6) Start training for (filters=60 - dense=20 - dropout=0.4) Start training for (filters=60 - dense=20 - dropout=0.5) Start training for (filters=60 - dense=20 - dropout=0.6) Start training for (filters=60 - dense=24 - dropout=0.4) Start training for (filters=60 - dense=24 - dropout=0.5) Start training for (filters=60 - dense=24 - dropout=0.6) Start training for (filters=72 - dense=4 - dropout=0.4) Start training for (filters=72 - dense=4 - dropout=0.5) Start training for (filters=72 - dense=4 - dropout=0.6) Start training for (filters=72 - dense=8 - dropout=0.4) Start training for (filters=72 - dense=8 - dropout=0.5) Start training for (filters=72 - dense=8 - dropout=0.6) Start training for (filters=72 - dense=12 - dropout=0.4) Start training for (filters=72 - dense=12 - dropout=0.5) Start training for (filters=72 - dense=12 - dropout=0.6) Start training for (filters=72 - dense=16 - dropout=0.4) Start training for (filters=72 - dense=16 - dropout=0.5) Start training for (filters=72 - dense=16 - dropout=0.6) Start training for (filters=72 - dense=20 - dropout=0.4) Start training for (filters=72 - dense=20 - dropout=0.5) Start training for (filters=72 - dense=20 - dropout=0.6) Start training for (filters=72 - dense=24 - dropout=0.4) Start training for (filters=72 - dense=24 - dropout=0.5) Start training for (filters=72 - dense=24 - dropout=0.6)
In [ ]:
resultsDF = pd.DataFrame(search_results)
# sort values
resultsDF.sort_values('best_val_acc', ascending=False)
Out[ ]:
| nb_filters | nb_dense | dropout | best_val_acc_train_acc | best_val_acc | best_val_acc_train_loss | best_val_acc_loss | best_val_epoch | history | train_loss | |
|---|---|---|---|---|---|---|---|---|---|---|
| 52 | 48 | 24 | 0.5 | 0.999585 | 0.953488 | 0.008526 | 0.184757 | 28 | {'loss': [0.48961323499679565, 0.3927376568317... | [0.48961323499679565, 0.39273765683174133, 0.3... |
| 26 | 36 | 12 | 0.6 | 0.985880 | 0.953488 | 0.052991 | 0.203555 | 20 | {'loss': [0.5133034586906433, 0.42175251245498... | [0.5133034586906433, 0.4217525124549866, 0.365... |
| 77 | 72 | 8 | 0.6 | 0.998754 | 0.953488 | 0.010520 | 0.208419 | 24 | {'loss': [0.5115669965744019, 0.42088097333908... | [0.5115669965744019, 0.4208809733390808, 0.371... |
| 29 | 36 | 16 | 0.6 | 0.995432 | 0.953488 | 0.027829 | 0.173891 | 29 | {'loss': [0.5065136551856995, 0.41559809446334... | [0.5065136551856995, 0.4155980944633484, 0.374... |
| 40 | 48 | 8 | 0.5 | 0.996262 | 0.953488 | 0.020991 | 0.201622 | 24 | {'loss': [0.5300318598747253, 0.45309635996818... | [0.5300318598747253, 0.4530963599681854, 0.371... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 0 | 24 | 4 | 0.4 | 0.978821 | 0.936877 | 0.099690 | 0.227430 | 26 | {'loss': [0.5506211519241333, 0.49073529243469... | [0.5506211519241333, 0.4907352924346924, 0.453... |
| 38 | 48 | 4 | 0.6 | 0.959302 | 0.935216 | 0.096160 | 0.187134 | 26 | {'loss': [0.5921011567115784, 0.50595074892044... | [0.5921011567115784, 0.5059507489204407, 0.458... |
| 6 | 24 | 12 | 0.4 | 0.981312 | 0.935216 | 0.053870 | 0.223632 | 17 | {'loss': [0.5056522488594055, 0.38887330889701... | [0.5056522488594055, 0.38887330889701843, 0.34... |
| 18 | 36 | 4 | 0.4 | 0.990033 | 0.933555 | 0.066137 | 0.297967 | 27 | {'loss': [0.5542446970939636, 0.50764954090118... | [0.5542446970939636, 0.5076495409011841, 0.476... |
| 5 | 24 | 8 | 0.6 | 0.961379 | 0.931894 | 0.101981 | 0.202954 | 22 | {'loss': [0.534759521484375, 0.447866082191467... | [0.534759521484375, 0.4478660821914673, 0.3857... |
90 rows × 10 columns
In [ ]:
resultsDF['delta_acc'] = (resultsDF['best_val_acc_train_acc']-resultsDF['best_val_acc'])/resultsDF['best_val_acc']
In [ ]:
sns.pairplot(resultsDF, x_vars=['nb_filters', 'nb_dense', 'dropout', ], y_vars=['best_val_acc', 'best_val_acc_train_acc', 'delta_acc'], kind='reg', height=2)
plt.savefig('plots/pairplot.pdf')
In [ ]:
# Part of solution for task 3
# Let's inspect the history object:
# search_results[0]['history'].keys()
# # the entry "train_loss" was added by us in the callback, normally it is just 'loss'
# # which combinations perform best?
# resultsDF = pd.DataFrame(search_results).sort_values('best_val_acc', ascending=False)
# display(resultsDF)
top_3_indices = resultsDF.index.values[:3]
In [ ]:
# empty plots, just to get the legend entries
plt.plot([],[],'k--', label='Training')
plt.plot([],[],'k-', label='Validation')
print(resultsDF['history'][0].keys())
# let's have a look at loss curves of the three best performing models
for idx, (row_index, row_series) in enumerate(resultsDF.sort_values('best_val_acc', ascending=False).head(3).iterrows()):
x = np.arange(1, len(row_series['history']['loss'])+1)
parameter_combination_string = f"$n_{{\\mathrm{{filter}}}}=${row_series['nb_filters']}, $n_{{\\mathrm{{dense}}}}=${row_series['nb_dense']}, $do=${row_series['dropout']}"
plt.plot(x, row_series['history']['loss'], '--', color=f'C{idx}')
plt.plot(x, row_series['history']['val_loss'], '-', color=f'C{idx}')
# and again empty, just for the legend entry
plt.fill_between([],[],[],color=f'C{idx}', label=parameter_combination_string)
plt.xlabel('Epochs')
# limit ticks to integers using the length of the last results loss curve
# plt.xticks(x)
plt.ylabel('Categorical crossentropy loss')
# people should use those frames less frequently I think
plt.legend(frameon=False)
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
Out[ ]:
<matplotlib.legend.Legend at 0x7fcad449e610>
In [ ]:
resultsDF.to_csv('gridsearch_results.csv', index=False)
In [ ]:
resultsDF.sort_values('delta_acc', ascending=True).head(3)
Out[ ]:
| nb_filters | nb_dense | dropout | best_val_acc_train_acc | best_val_acc | best_val_acc_train_loss | best_val_acc_loss | best_val_epoch | history | train_loss | delta_acc | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 24 | 4 | 0.6 | 0.948505 | 0.936877 | 0.142610 | 0.227296 | 25 | {'loss': [0.5562707185745239, 0.49141937494277... | [0.5562707185745239, 0.49141937494277954, 0.46... | 0.012411 |
| 10 | 24 | 16 | 0.5 | 0.965116 | 0.946844 | 0.102133 | 0.185263 | 20 | {'loss': [0.4974000155925751, 0.44332581758499... | [0.4974000155925751, 0.44332581758499146, 0.36... | 0.019298 |
| 38 | 48 | 4 | 0.6 | 0.959302 | 0.935216 | 0.096160 | 0.187134 | 26 | {'loss': [0.5921011567115784, 0.50595074892044... | [0.5921011567115784, 0.5059507489204407, 0.458... | 0.025755 |
In [ ]:
# empty plots, just to get the legend entries
plt.plot([],[],'k--', label='Training')
plt.plot([],[],'k-', label='Validation')
# print(resultsDF['history'][0].keys())
# let's have a look at loss curves of the three best performing models
for idx, (row_index, row_series) in enumerate(resultsDF.sort_values('delta_acc', ascending=True).head(3).iterrows()):
x = np.arange(1, len(row_series['history']['loss'])+1)
parameter_combination_string = f"$n_{{\\mathrm{{filter}}}}=${row_series['nb_filters']}, $n_{{\\mathrm{{dense}}}}=${row_series['nb_dense']}, $do=${row_series['dropout']}"
plt.plot(x, row_series['history']['loss'], '--', color=f'C{idx}')
plt.plot(x, row_series['history']['val_loss'], '-', color=f'C{idx}')
# and again empty, just for the legend entry
plt.fill_between([],[],[],color=f'C{idx}', label=parameter_combination_string)
# print("delta_acc", row_series['delta_acc'], 'best_val_acc', row_series['best_val_acc'], 'best_val_acc_train_acc', row_series['best_val_acc_train_acc'])
plt.xlabel('Epochs')
# limit ticks to integers using the length of the last results loss curve
# plt.xticks(x)
plt.ylabel('Categorical crossentropy loss')
# people should use those frames less frequently I think
plt.legend(frameon=False)
Out[ ]:
<matplotlib.legend.Legend at 0x7fcad555cf10>
In [ ]:
best_model = tf.keras.Sequential([
tf.keras.layers.Rescaling(1./255),
tf.keras.layers.Conv2D(24, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(12, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
# tf.keras.layers.Dropout(0.5),
tf.keras.layers.Conv2D(8, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.6),
tf.keras.layers.Dense(4, activation='relu'),
tf.keras.layers.Dense(2)
])
optimizer = tf.optimizers.Adam(learning_rate=0.005)
best_model.compile(
optimizer=optimizer,
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`. WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
In [ ]:
best_model_hist = best_model.fit(
train_ds,
validation_data=val_ds,
epochs=100,
verbose=2
)
Epoch 1/100 76/76 - 17s - loss: 0.5415 - accuracy: 0.7554 - val_loss: 0.4171 - val_accuracy: 0.8040 - 17s/epoch - 229ms/step Epoch 2/100 76/76 - 17s - loss: 0.3871 - accuracy: 0.8326 - val_loss: 0.3768 - val_accuracy: 0.8306 - 17s/epoch - 223ms/step Epoch 3/100 76/76 - 17s - loss: 0.3538 - accuracy: 0.8509 - val_loss: 0.3128 - val_accuracy: 0.8704 - 17s/epoch - 222ms/step Epoch 4/100 76/76 - 17s - loss: 0.3340 - accuracy: 0.8600 - val_loss: 0.3211 - val_accuracy: 0.8738 - 17s/epoch - 220ms/step Epoch 5/100 76/76 - 16s - loss: 0.3007 - accuracy: 0.8654 - val_loss: 0.2762 - val_accuracy: 0.8937 - 16s/epoch - 216ms/step Epoch 6/100 76/76 - 16s - loss: 0.2874 - accuracy: 0.8783 - val_loss: 0.3164 - val_accuracy: 0.8688 - 16s/epoch - 204ms/step Epoch 7/100 76/76 - 15s - loss: 0.2848 - accuracy: 0.8733 - val_loss: 0.2787 - val_accuracy: 0.8804 - 15s/epoch - 196ms/step Epoch 8/100 76/76 - 15s - loss: 0.2622 - accuracy: 0.8858 - val_loss: 0.2873 - val_accuracy: 0.8787 - 15s/epoch - 204ms/step Epoch 9/100 76/76 - 17s - loss: 0.2467 - accuracy: 0.8958 - val_loss: 0.2648 - val_accuracy: 0.8887 - 17s/epoch - 218ms/step Epoch 10/100 76/76 - 14s - loss: 0.2393 - accuracy: 0.9045 - val_loss: 0.2829 - val_accuracy: 0.8804 - 14s/epoch - 182ms/step Epoch 11/100 76/76 - 14s - loss: 0.2428 - accuracy: 0.8978 - val_loss: 0.2526 - val_accuracy: 0.8953 - 14s/epoch - 187ms/step Epoch 12/100 76/76 - 15s - loss: 0.2165 - accuracy: 0.9128 - val_loss: 0.2214 - val_accuracy: 0.9120 - 15s/epoch - 199ms/step Epoch 13/100 76/76 - 17s - loss: 0.2197 - accuracy: 0.9049 - val_loss: 0.2258 - val_accuracy: 0.9120 - 17s/epoch - 223ms/step Epoch 14/100 76/76 - 17s - loss: 0.2141 - accuracy: 0.9132 - val_loss: 0.2129 - val_accuracy: 0.9219 - 17s/epoch - 220ms/step Epoch 15/100 76/76 - 17s - loss: 0.2043 - accuracy: 0.9203 - val_loss: 0.3349 - val_accuracy: 0.8555 - 17s/epoch - 226ms/step Epoch 16/100 76/76 - 17s - loss: 0.2300 - accuracy: 0.8995 - val_loss: 0.2293 - val_accuracy: 0.9053 - 17s/epoch - 225ms/step Epoch 17/100 76/76 - 16s - loss: 0.1872 - accuracy: 0.9277 - val_loss: 0.2377 - val_accuracy: 0.9103 - 16s/epoch - 211ms/step Epoch 18/100 76/76 - 17s - loss: 0.1888 - accuracy: 0.9261 - val_loss: 0.2732 - val_accuracy: 0.8920 - 17s/epoch - 225ms/step Epoch 19/100 76/76 - 17s - loss: 0.1820 - accuracy: 0.9282 - val_loss: 0.2225 - val_accuracy: 0.9153 - 17s/epoch - 223ms/step Epoch 20/100 76/76 - 16s - loss: 0.1594 - accuracy: 0.9344 - val_loss: 0.2087 - val_accuracy: 0.9236 - 16s/epoch - 217ms/step Epoch 21/100 76/76 - 16s - loss: 0.1544 - accuracy: 0.9390 - val_loss: 0.1936 - val_accuracy: 0.9336 - 16s/epoch - 209ms/step Epoch 22/100 76/76 - 17s - loss: 0.1593 - accuracy: 0.9365 - val_loss: 0.2085 - val_accuracy: 0.9219 - 17s/epoch - 227ms/step Epoch 23/100 76/76 - 16s - loss: 0.1626 - accuracy: 0.9352 - val_loss: 0.2028 - val_accuracy: 0.9269 - 16s/epoch - 206ms/step Epoch 24/100 76/76 - 17s - loss: 0.1783 - accuracy: 0.9273 - val_loss: 0.1780 - val_accuracy: 0.9385 - 17s/epoch - 230ms/step Epoch 25/100 76/76 - 16s - loss: 0.1688 - accuracy: 0.9323 - val_loss: 0.1927 - val_accuracy: 0.9203 - 16s/epoch - 214ms/step Epoch 26/100 76/76 - 16s - loss: 0.1394 - accuracy: 0.9464 - val_loss: 0.1929 - val_accuracy: 0.9302 - 16s/epoch - 214ms/step Epoch 27/100 76/76 - 15s - loss: 0.1420 - accuracy: 0.9402 - val_loss: 0.1962 - val_accuracy: 0.9269 - 15s/epoch - 196ms/step Epoch 28/100 76/76 - 16s - loss: 0.1339 - accuracy: 0.9498 - val_loss: 0.1887 - val_accuracy: 0.9369 - 16s/epoch - 216ms/step Epoch 29/100 76/76 - 17s - loss: 0.1427 - accuracy: 0.9394 - val_loss: 0.3047 - val_accuracy: 0.8953 - 17s/epoch - 227ms/step Epoch 30/100 76/76 - 15s - loss: 0.1431 - accuracy: 0.9414 - val_loss: 0.1752 - val_accuracy: 0.9468 - 15s/epoch - 193ms/step Epoch 31/100 76/76 - 16s - loss: 0.1152 - accuracy: 0.9572 - val_loss: 0.2411 - val_accuracy: 0.9336 - 16s/epoch - 212ms/step Epoch 32/100 76/76 - 14s - loss: 0.1417 - accuracy: 0.9448 - val_loss: 0.1903 - val_accuracy: 0.9369 - 14s/epoch - 186ms/step Epoch 33/100 76/76 - 14s - loss: 0.1071 - accuracy: 0.9601 - val_loss: 0.1680 - val_accuracy: 0.9419 - 14s/epoch - 186ms/step Epoch 34/100 76/76 - 15s - loss: 0.1166 - accuracy: 0.9556 - val_loss: 0.2033 - val_accuracy: 0.9252 - 15s/epoch - 200ms/step Epoch 35/100 76/76 - 15s - loss: 0.1347 - accuracy: 0.9502 - val_loss: 0.1781 - val_accuracy: 0.9302 - 15s/epoch - 202ms/step Epoch 36/100 76/76 - 16s - loss: 0.1104 - accuracy: 0.9572 - val_loss: 0.1733 - val_accuracy: 0.9402 - 16s/epoch - 206ms/step Epoch 37/100 76/76 - 17s - loss: 0.1254 - accuracy: 0.9531 - val_loss: 0.2208 - val_accuracy: 0.9336 - 17s/epoch - 218ms/step Epoch 38/100 76/76 - 15s - loss: 0.1041 - accuracy: 0.9597 - val_loss: 0.1790 - val_accuracy: 0.9485 - 15s/epoch - 196ms/step Epoch 39/100 76/76 - 16s - loss: 0.0946 - accuracy: 0.9655 - val_loss: 0.1798 - val_accuracy: 0.9502 - 16s/epoch - 211ms/step Epoch 40/100 76/76 - 15s - loss: 0.1012 - accuracy: 0.9643 - val_loss: 0.1592 - val_accuracy: 0.9502 - 15s/epoch - 202ms/step Epoch 41/100 76/76 - 15s - loss: 0.0918 - accuracy: 0.9659 - val_loss: 0.2179 - val_accuracy: 0.9402 - 15s/epoch - 197ms/step Epoch 42/100 76/76 - 16s - loss: 0.0894 - accuracy: 0.9722 - val_loss: 0.1556 - val_accuracy: 0.9585 - 16s/epoch - 212ms/step Epoch 43/100 76/76 - 16s - loss: 0.1121 - accuracy: 0.9551 - val_loss: 0.2255 - val_accuracy: 0.9269 - 16s/epoch - 208ms/step Epoch 44/100 76/76 - 15s - loss: 0.1213 - accuracy: 0.9522 - val_loss: 0.1698 - val_accuracy: 0.9551 - 15s/epoch - 198ms/step Epoch 45/100 76/76 - 15s - loss: 0.0888 - accuracy: 0.9697 - val_loss: 0.1836 - val_accuracy: 0.9452 - 15s/epoch - 195ms/step Epoch 46/100 76/76 - 16s - loss: 0.0887 - accuracy: 0.9689 - val_loss: 0.1350 - val_accuracy: 0.9585 - 16s/epoch - 209ms/step Epoch 47/100 76/76 - 16s - loss: 0.0971 - accuracy: 0.9635 - val_loss: 0.1350 - val_accuracy: 0.9485 - 16s/epoch - 210ms/step Epoch 48/100 76/76 - 18s - loss: 0.0747 - accuracy: 0.9709 - val_loss: 0.2554 - val_accuracy: 0.9269 - 18s/epoch - 242ms/step Epoch 49/100 76/76 - 18s - loss: 0.0769 - accuracy: 0.9709 - val_loss: 0.1550 - val_accuracy: 0.9585 - 18s/epoch - 237ms/step Epoch 50/100 76/76 - 16s - loss: 0.1267 - accuracy: 0.9593 - val_loss: 0.1782 - val_accuracy: 0.9385 - 16s/epoch - 212ms/step Epoch 51/100 76/76 - 16s - loss: 0.0913 - accuracy: 0.9680 - val_loss: 0.2932 - val_accuracy: 0.9302 - 16s/epoch - 214ms/step Epoch 52/100 76/76 - 13s - loss: 0.0914 - accuracy: 0.9655 - val_loss: 0.1593 - val_accuracy: 0.9585 - 13s/epoch - 168ms/step Epoch 53/100 76/76 - 17s - loss: 0.0785 - accuracy: 0.9726 - val_loss: 0.2050 - val_accuracy: 0.9485 - 17s/epoch - 219ms/step Epoch 54/100 76/76 - 16s - loss: 0.0830 - accuracy: 0.9705 - val_loss: 0.2049 - val_accuracy: 0.9535 - 16s/epoch - 207ms/step Epoch 55/100 76/76 - 16s - loss: 0.0831 - accuracy: 0.9705 - val_loss: 0.1584 - val_accuracy: 0.9535 - 16s/epoch - 216ms/step Epoch 56/100 76/76 - 16s - loss: 0.0707 - accuracy: 0.9763 - val_loss: 0.1617 - val_accuracy: 0.9635 - 16s/epoch - 216ms/step Epoch 57/100 76/76 - 16s - loss: 0.0703 - accuracy: 0.9759 - val_loss: 0.1483 - val_accuracy: 0.9585 - 16s/epoch - 210ms/step Epoch 58/100 76/76 - 16s - loss: 0.0845 - accuracy: 0.9705 - val_loss: 0.1841 - val_accuracy: 0.9485 - 16s/epoch - 213ms/step Epoch 59/100 76/76 - 16s - loss: 0.0998 - accuracy: 0.9643 - val_loss: 0.1372 - val_accuracy: 0.9568 - 16s/epoch - 217ms/step Epoch 60/100 76/76 - 16s - loss: 0.0652 - accuracy: 0.9759 - val_loss: 0.1843 - val_accuracy: 0.9535 - 16s/epoch - 216ms/step Epoch 61/100 76/76 - 16s - loss: 0.0653 - accuracy: 0.9784 - val_loss: 0.1337 - val_accuracy: 0.9535 - 16s/epoch - 215ms/step Epoch 62/100 76/76 - 16s - loss: 0.0684 - accuracy: 0.9780 - val_loss: 0.1441 - val_accuracy: 0.9618 - 16s/epoch - 214ms/step Epoch 63/100 76/76 - 16s - loss: 0.0616 - accuracy: 0.9805 - val_loss: 0.1402 - val_accuracy: 0.9551 - 16s/epoch - 215ms/step Epoch 64/100 76/76 - 15s - loss: 0.0589 - accuracy: 0.9788 - val_loss: 0.1705 - val_accuracy: 0.9618 - 15s/epoch - 202ms/step Epoch 65/100 76/76 - 16s - loss: 0.0697 - accuracy: 0.9743 - val_loss: 0.1747 - val_accuracy: 0.9601 - 16s/epoch - 214ms/step Epoch 66/100 76/76 - 16s - loss: 0.0861 - accuracy: 0.9730 - val_loss: 0.1456 - val_accuracy: 0.9585 - 16s/epoch - 214ms/step Epoch 67/100 76/76 - 15s - loss: 0.0732 - accuracy: 0.9738 - val_loss: 0.1213 - val_accuracy: 0.9668 - 15s/epoch - 196ms/step Epoch 68/100 76/76 - 14s - loss: 0.0716 - accuracy: 0.9755 - val_loss: 0.1174 - val_accuracy: 0.9701 - 14s/epoch - 186ms/step Epoch 69/100 76/76 - 16s - loss: 0.0669 - accuracy: 0.9797 - val_loss: 0.1388 - val_accuracy: 0.9684 - 16s/epoch - 213ms/step Epoch 70/100 76/76 - 16s - loss: 0.0675 - accuracy: 0.9763 - val_loss: 0.1923 - val_accuracy: 0.9551 - 16s/epoch - 207ms/step Epoch 71/100 76/76 - 16s - loss: 0.1137 - accuracy: 0.9589 - val_loss: 0.1445 - val_accuracy: 0.9651 - 16s/epoch - 207ms/step Epoch 72/100 76/76 - 16s - loss: 0.0899 - accuracy: 0.9693 - val_loss: 0.1617 - val_accuracy: 0.9535 - 16s/epoch - 217ms/step Epoch 73/100 76/76 - 15s - loss: 0.0567 - accuracy: 0.9797 - val_loss: 0.1743 - val_accuracy: 0.9485 - 15s/epoch - 196ms/step Epoch 74/100 76/76 - 15s - loss: 0.0797 - accuracy: 0.9726 - val_loss: 0.1258 - val_accuracy: 0.9651 - 15s/epoch - 204ms/step Epoch 75/100 76/76 - 13s - loss: 0.0857 - accuracy: 0.9701 - val_loss: 0.2392 - val_accuracy: 0.9551 - 13s/epoch - 174ms/step Epoch 76/100 76/76 - 16s - loss: 0.0912 - accuracy: 0.9693 - val_loss: 0.1359 - val_accuracy: 0.9701 - 16s/epoch - 216ms/step Epoch 77/100 76/76 - 16s - loss: 0.0640 - accuracy: 0.9797 - val_loss: 0.1384 - val_accuracy: 0.9601 - 16s/epoch - 212ms/step Epoch 78/100 76/76 - 17s - loss: 0.0800 - accuracy: 0.9693 - val_loss: 0.1551 - val_accuracy: 0.9618 - 17s/epoch - 227ms/step Epoch 79/100 76/76 - 15s - loss: 0.0571 - accuracy: 0.9813 - val_loss: 0.1663 - val_accuracy: 0.9618 - 15s/epoch - 199ms/step Epoch 80/100 76/76 - 16s - loss: 0.0482 - accuracy: 0.9813 - val_loss: 0.1543 - val_accuracy: 0.9701 - 16s/epoch - 216ms/step Epoch 81/100 76/76 - 17s - loss: 0.0588 - accuracy: 0.9792 - val_loss: 0.1239 - val_accuracy: 0.9701 - 17s/epoch - 219ms/step Epoch 82/100 76/76 - 17s - loss: 0.0460 - accuracy: 0.9842 - val_loss: 0.1696 - val_accuracy: 0.9668 - 17s/epoch - 221ms/step Epoch 83/100 76/76 - 14s - loss: 0.0470 - accuracy: 0.9830 - val_loss: 0.2891 - val_accuracy: 0.9485 - 14s/epoch - 187ms/step Epoch 84/100 76/76 - 16s - loss: 0.0825 - accuracy: 0.9693 - val_loss: 0.1597 - val_accuracy: 0.9585 - 16s/epoch - 216ms/step Epoch 85/100 76/76 - 16s - loss: 0.0517 - accuracy: 0.9817 - val_loss: 0.2136 - val_accuracy: 0.9568 - 16s/epoch - 216ms/step Epoch 86/100 76/76 - 15s - loss: 0.0504 - accuracy: 0.9834 - val_loss: 0.1814 - val_accuracy: 0.9518 - 15s/epoch - 200ms/step Epoch 87/100 76/76 - 13s - loss: 0.0526 - accuracy: 0.9821 - val_loss: 0.1680 - val_accuracy: 0.9618 - 13s/epoch - 178ms/step Epoch 88/100 76/76 - 16s - loss: 0.0420 - accuracy: 0.9871 - val_loss: 0.2491 - val_accuracy: 0.9286 - 16s/epoch - 211ms/step Epoch 89/100 76/76 - 14s - loss: 0.0536 - accuracy: 0.9792 - val_loss: 0.1871 - val_accuracy: 0.9618 - 14s/epoch - 179ms/step Epoch 90/100 76/76 - 16s - loss: 0.0711 - accuracy: 0.9780 - val_loss: 0.1829 - val_accuracy: 0.9585 - 16s/epoch - 214ms/step Epoch 91/100 76/76 - 16s - loss: 0.0688 - accuracy: 0.9784 - val_loss: 0.1982 - val_accuracy: 0.9568 - 16s/epoch - 206ms/step Epoch 92/100 76/76 - 17s - loss: 0.0483 - accuracy: 0.9826 - val_loss: 0.1974 - val_accuracy: 0.9551 - 17s/epoch - 220ms/step Epoch 93/100 76/76 - 16s - loss: 0.0744 - accuracy: 0.9805 - val_loss: 0.1405 - val_accuracy: 0.9551 - 16s/epoch - 209ms/step Epoch 94/100 76/76 - 15s - loss: 0.0579 - accuracy: 0.9830 - val_loss: 0.1545 - val_accuracy: 0.9618 - 15s/epoch - 201ms/step Epoch 95/100 76/76 - 16s - loss: 0.0542 - accuracy: 0.9805 - val_loss: 0.2258 - val_accuracy: 0.9535 - 16s/epoch - 215ms/step Epoch 96/100 76/76 - 16s - loss: 0.0723 - accuracy: 0.9767 - val_loss: 0.1247 - val_accuracy: 0.9684 - 16s/epoch - 206ms/step Epoch 97/100 76/76 - 15s - loss: 0.0638 - accuracy: 0.9792 - val_loss: 0.1473 - val_accuracy: 0.9635 - 15s/epoch - 192ms/step Epoch 98/100 76/76 - 17s - loss: 0.0502 - accuracy: 0.9830 - val_loss: 0.1383 - val_accuracy: 0.9668 - 17s/epoch - 229ms/step Epoch 99/100 76/76 - 17s - loss: 0.0734 - accuracy: 0.9772 - val_loss: 0.1141 - val_accuracy: 0.9684 - 17s/epoch - 218ms/step Epoch 100/100 76/76 - 15s - loss: 0.0671 - accuracy: 0.9738 - val_loss: 0.1801 - val_accuracy: 0.9635 - 15s/epoch - 199ms/step
In [ ]:
best_model.save('./models/best_model')
2024-07-20 17:51:41.913399: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
[[{{node inputs}}]]
2024-07-20 17:51:42.059944: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
[[{{node inputs}}]]
WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _update_step_xla while saving (showing 4 of 4). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ./models/best_model/assets
INFO:tensorflow:Assets written to: ./models/best_model/assets
In [ ]:
best_model = tf.keras.models.load_model("models/best_model")
best_model.summary()
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.RestoredOptimizer` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.RestoredOptimizer`. WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`. WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
Model: "sequential_92"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
rescaling_92 (Rescaling) (None, 240, 240, 3) 0
conv2d_276 (Conv2D) (None, 238, 238, 24) 672
max_pooling2d_276 (MaxPooli (None, 119, 119, 24) 0
ng2D)
conv2d_277 (Conv2D) (None, 117, 117, 12) 2604
max_pooling2d_277 (MaxPooli (None, 58, 58, 12) 0
ng2D)
conv2d_278 (Conv2D) (None, 56, 56, 8) 872
max_pooling2d_278 (MaxPooli (None, 28, 28, 8) 0
ng2D)
flatten_92 (Flatten) (None, 6272) 0
dropout_92 (Dropout) (None, 6272) 0
dense_184 (Dense) (None, 4) 25092
dense_185 (Dense) (None, 2) 10
=================================================================
Total params: 29,250
Trainable params: 29,250
Non-trainable params: 0
_________________________________________________________________
In [ ]:
plot_history(best_model_hist)
In [ ]:
best_model = tf.keras.models.load_model("models/best_model")
best_model_pred = best_model.predict(test_ds).argmax(axis=1)
full_evaluation(test_labels, best_model_pred, 'best_model')
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.RestoredOptimizer` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.RestoredOptimizer`. WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`. WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
24/24 [==============================] - 1s 60ms/step
Accuracy: 0.9694
precision recall f1-score support
0.0 0.94 0.99 0.97 336
1.0 0.99 0.95 0.97 416
accuracy 0.97 752
macro avg 0.97 0.97 0.97 752
weighted avg 0.97 0.97 0.97 752
Coefficient of determination: 0.8763
Mean squared error: 0.0306
Mean absolute error: 0.0306
In [ ]:
fpr, tpr, thresholds = roc_curve(test_labels, best_model_pred)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.savefig('plots/roc_curve_best_model.pdf')
Compare Recall scores¶
In [ ]:
def calc_recall_precision(test, predict):
cm = confusion_matrix(test, predict)
recall = cm[1, 1] / (cm[1, 1] + cm[1, 0])
precision = cm[1, 1] / (cm[1, 1] + cm[0, 1])
return recall, precision
test_labels = np.zeros(752)
test_labels[0:336] = 0
test_labels[336:] = 1
In [ ]:
compare = []
for model, pred in zip(['SVM', 'LR', 'kNN', 'CNN', 'best_model'], [svm_pred, lr_pred, knn_pred, cnn_pred, best_model_pred]):
# print(f'Calculating metrics for {model}')
if model != 'CNN' and model != 'best_model':
accuracy = accuracy_score(y_test, pred)
recall, precision = calc_recall_precision(y_test, pred)
mse = mean_squared_error(y_test, pred)
mae = mean_absolute_error(y_test, pred)
r2 = r2_score(y_test, pred)
else:
accuracy = accuracy_score(test_labels, pred)
recall, precision = calc_recall_precision(test_labels, pred)
mse = mean_squared_error(test_labels, pred)
mae = mean_absolute_error(test_labels, pred)
r2 = r2_score(test_labels, pred)
compare.append({'model': model,
'recall': recall,
'accuracy': accuracy,
'precision': precision,
'f1': 2 * (precision * recall) / (precision + recall),
'r2': r2,
'mse': mse,
'mae': mae})
In [ ]:
recallComp = pd.DataFrame(compare)
recallComp.sort_values('recall', ascending=False)
Out[ ]:
| model | recall | accuracy | precision | f1 | r2 | mse | mae | |
|---|---|---|---|---|---|---|---|---|
| 0 | SVM | 0.961652 | 0.980080 | 0.993902 | 0.977511 | 0.919520 | 0.019920 | 0.019920 |
| 1 | LR | 0.961652 | 0.978752 | 0.990881 | 0.976048 | 0.914155 | 0.021248 | 0.021248 |
| 4 | best_model | 0.951923 | 0.969415 | 0.992481 | 0.971779 | 0.876259 | 0.030585 | 0.030585 |
| 2 | kNN | 0.946903 | 0.976096 | 1.000000 | 0.972727 | 0.903424 | 0.023904 | 0.023904 |
| 3 | CNN | 0.872596 | 0.922872 | 0.986413 | 0.926020 | 0.687958 | 0.077128 | 0.077128 |
In [ ]:
recallComp.to_csv('Comparison-AllModels.csv', index=False)
In [ ]: